COVID-19 Data Analysis

ITALY REGIONS

Data repository: link

Jupyter Notebook repository: link

REGIONS DATA TABLE

In [1]:
import json
import requests
import datetime as dt

from IPython.display import Markdown, display

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.offline as pyo

from matplotlib.dates import date2num, num2date

from scipy.optimize import curve_fit
from scipy.interpolate import interp1d
import numpy as np
import pandas as pd

pyo.init_notebook_mode()
In [2]:
def logistic(x, L, x0, k):
    y = L / (1 + np.exp(-k * (x - x0)))
    return y
In [3]:
json_regions = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-json/dpc-covid19-ita-regioni.json"
with requests.get(json_regions) as req:
    data = json.loads(req.content.decode('utf-8-sig'))
In [4]:
print("FIRST ENTRY DATE: {}".format(
    data[0]["data"]
    )
)
print("LAST  ENTRY DATE: {}".format(
    data[-1]["data"]
    )
)
period = (
    dt.datetime.strptime(data[-1]["data"], "%Y-%m-%dT%H:%M:%S") -
    dt.datetime.strptime(data[0]["data"], "%Y-%m-%dT%H:%M:%S")
).days

print("COVERAGE: {} days".format(period))
print("CURRENT DATE IS: {}".format(dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
FIRST ENTRY DATE: 2020-02-24T18:00:00
LAST  ENTRY DATE: 2020-05-29T17:00:00
COVERAGE: 94 days
CURRENT DATE IS: 2020-05-30 17:33:51
In [5]:
x = []   # datetime x array
_x = []  # integer x array
yC = {}  # new confirmed cases array
yD = {}  # new deaths array
yR = {}  # new recovered array
yP = {}  # new infected array

TOTyC = {}  # confirmed cases array
TOTyD = {}  # deaths array
TOTyR = {}  # recovered array
TOTyP = {}  # infected array
# TOTyr = {}  # mortality rate
# TOTyk = {}  # recovery rate

yPric = {}  # new ricoverati
yPint = {}  # new intensiva
yPiso = {}  # new isolamento
TOTyPric = {}  # ricoverati
TOTyPint = {}  # intensiva
TOTyPiso = {}  # isolamento
In [6]:
for entry in data:

    # x values
    date = dt.datetime.strptime(entry["data"], "%Y-%m-%dT%H:%M:%S")
    if date not in x:
        x.append(date)
        _x.append(len(x) - 1)
        
    region = entry["denominazione_regione"]
    if region not in TOTyC:
        yC.update({region: []})
        yD.update({region: []})
        yR.update({region: []})
        yP.update({region: []})
        TOTyC.update({region: []})
        TOTyD.update({region: []})
        TOTyR.update({region: []})
        TOTyP.update({region: []})
        # dettagli
        yPric.update({region: []})
        yPint.update({region: []})
        yPiso.update({region: []})
        TOTyPric.update({region: []})
        TOTyPint.update({region: []})
        TOTyPiso.update({region: []})
        # TOTyr.update({region: []})
        # TOTyk.update({region: []})
    # y TOT values
    if len(TOTyC[region]):
        yC[region].append(entry["totale_casi"] - TOTyC[region][len(TOTyC[region]) - 1])
        yD[region].append(entry["deceduti"] - TOTyD[region][len(TOTyD[region]) - 1])
        yR[region].append((int(entry["dimessi_guariti"])) - TOTyR[region][len(TOTyR[region]) - 1])
        yP[region].append((entry["totale_positivi"]) - TOTyP[region][len(TOTyP[region]) - 1])
        # dettagli
        yPric[region].append(entry["ricoverati_con_sintomi"] - TOTyPric[region][len(TOTyPric[region]) - 1])
        yPint[region].append(entry["terapia_intensiva"] - TOTyPint[region][len(TOTyPint[region]) - 1])
        yPiso[region].append(entry["isolamento_domiciliare"] - TOTyPiso[region][len(TOTyPiso[region]) - 1])
    TOTyC[region].append(entry["totale_casi"])
    TOTyD[region].append(entry["deceduti"])
    TOTyR[region].append(int(entry["dimessi_guariti"]))
    TOTyP[region].append(entry["totale_positivi"])
    # dettagli
    TOTyPric[region].append(entry["ricoverati_con_sintomi"])
    TOTyPint[region].append(entry["terapia_intensiva"])
    TOTyPiso[region].append(entry["isolamento_domiciliare"])
    
#    if entry["totale_casi"]:
#        TOTyr[region].append(entry["deceduti"] / entry["totale_casi"])
#        TOTyk[region].append(entry["dimessi_guariti"] / entry["totale_casi"])
#    else:
#        TOTyr[region].append(.0)
#        TOTyk[region].append(.0)
In [7]:
menu = "#### REGIONI:\n"
for region in TOTyC:
    menu += "- [{}](#{})\n".format(region, region.replace(" ", "-"))

display(Markdown(menu))
In [8]:
dpc_to_google = {
    "Abruzzo": 'Abruzzo', 
    "Valle d'Aosta": 'Aosta', 
    "Puglia": 'Apulia', 
    "Basilicata": 'Basilicata', 
    "Calabria": 'Calabria', 
    "Campania": 'Campania',
    "Emilia-Romagna": 'Emilia-Romagna', 
    "Friuli Venezia Giulia": 'Friuli-Venezia Giulia', 
    "Lazio": 'Lazio', 
    "Liguria": 'Liguria',
    "Lombardia": 'Lombardy', 
    "Marche": 'Marche', 
    "Molise": 'Molise', 
    "Piemonte": 'Piedmont', 
    "Sardegna": 'Sardinia', 
    "Sicilia": 'Sicily',
    "P.A. Bolzano": 'Trentino-South Tyrol', 
    "P.A. Trento": 'Trentino-South Tyrol', 
    "Toscana": 'Tuscany', 
    "Umbria": 'Umbria', 
    "Veneto": 'Veneto',
}
In [9]:
#try:
#    google = pd.read_csv(
#        "https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv",
#        parse_dates=['date'], dtype={"sub_region_1": str, "sub_region_2": str},
#        index_col=["date"]
#    )
#    google.to_pickle("google-mobility.pkl")
#except Exception as err:
#    print(f"ERROR google mobility service: {err}")

google = pd.read_pickle("google-mobility.pkl")

ITALY = google.loc[google["country_region_code"] == "IT"]
Regional = ITALY.loc[ITALY.fillna("NONE")["sub_region_1"] != "NONE"]
In [13]:
Rts = pd.read_pickle("Rt-regions-time-series.pkl")
In [14]:
#_R0 = {}

for region in TOTyC:
    # total amounts
    display(Markdown("***"))
    display(Markdown('## {}'.format(region)))
    
    fig = go.Figure()

    fig.add_trace(
        go.Bar(
            name="Serious", x=x, y=TOTyPint[region], marker_color="red"
        )
    )
    fig.add_trace(
        go.Bar(
            name="Mild", x=x, y=TOTyPric[region], marker_color="orange"
        )
    )
    fig.add_trace(
        go.Bar(
            name="Quarantine", x=x, y=TOTyPiso[region], marker_color="yellow"
        )
    )
    fig.add_trace(
        go.Bar(
            name="Recovered", x=x, y=TOTyR[region], marker_color="lime"
        )
    )
    fig.add_trace(
        go.Bar(
            name="Deaths", x=x, y=TOTyD[region], marker_color="grey"
        )
    )
    
    fig.add_trace(
        go.Scatter(
            name="Infected", x=x, y=TOTyP[region], marker_color="lightskyblue", line_shape='spline',
        )
    )
    fig.add_trace(
        go.Scatter(
            name="Total", x=x, y=TOTyC[region], marker_color="blue", line_shape='spline',
        )
    )

    fig.update_layout(
        showlegend=True, plot_bgcolor='rgba(0,0,0,0)', 
        xaxis={"gridcolor": '#bdbdbd'},
        title={"text": "{} overview".format(region), "xanchor": "center", "x": 0.5},
        hovermode="x unified", barmode='stack'
    )
    fig.update_yaxes(title_text="number", gridcolor='#bdbdbd')
    pyo.iplot(fig)
    
    # Rt
    result = Rts.loc[region]
    
    index = result['ML'].index.get_level_values('data')
    values = result['ML'].values

    # Aesthetically, extrapolate credible interval by 1 day either side
    lofn = interp1d(date2num(index),
                     result['Low_95'].values,
                     bounds_error=False,
                     fill_value='extrapolate')

    hifn = interp1d(date2num(index),
                      result['High_95'].values,
                      bounds_error=False,
                      fill_value='extrapolate')
    
    # Aesthetically, extrapolate credible interval by 1 day either side
    lofn50 = interp1d(date2num(index),
                     result['Low_50'].values,
                     bounds_error=False,
                     fill_value='extrapolate')

    hifn50 = interp1d(date2num(index),
                      result['High_50'].values,
                      bounds_error=False,
                      fill_value='extrapolate')

    extended = pd.date_range(start=index[0], end=index[-1]+pd.Timedelta(days=1))
    hyperextended = pd.date_range(start=index[0]-pd.Timedelta(days=1), end=index[-1]+pd.Timedelta(days=2))
        
    fig = go.Figure()
    
    fig.add_trace(
        go.Scatter(
            x=[index[0]-pd.Timedelta(days=10), index[-1]+pd.Timedelta(days=10)], y=[1, 1],
            line = dict(color='black', width=1), showlegend=False,
        )
    )
    fig.add_trace(
        go.Scatter(
            x=[pd.Timestamp("2020-03-11"), pd.Timestamp("2020-03-11")], y=[-100, 100],
            showlegend=False, name="lockdown", mode="lines",
            line=dict(color='black', width=2, dash='dot'),
        )
    )
    fig.add_trace(
        go.Scatter(
            x=[pd.Timestamp("2020-05-04"), pd.Timestamp("2020-05-04")], y=[-100, 100],
            showlegend=False, name="1st relax", mode="lines",
            line=dict(color='black', width=2, dash='dot'),
        )
    )
    fig.add_trace(
        go.Scatter(
            x=[
                pd.Timestamp("2020-05-04"), pd.Timestamp("2020-05-04"), 
                pd.Timestamp("2020-03-11"), pd.Timestamp("2020-03-11")],
            y=[0, max(values)+1, max(values)+1, 0],
            fill="toself", fillcolor="rgba(125,125,125,.05)",
            showlegend=False, name="lockdown \U00000394",
            line_color="rgba(0,0,0,0)", mode="lines",
        )
    )
    fig.add_trace(
        go.Scatter(
            x=index, y=lofn50(date2num(extended)),
            line_color="rgba(0,0,0,.25)", showlegend=False,
            name="lo50 R<sub>t</sub>"
        )
    )
    fig.add_trace(
        go.Scatter(
            x=index, y=hifn50(date2num(extended)),
            line_color="rgba(0,0,0,.25)", showlegend=False,
            fill="tonexty", fillcolor="rgba(0,0,0,.1)",
            name="hi50 R<sub>t</sub>"
        )
    )
    fig.add_trace(
        go.Scatter(
            x=index, y=lofn(date2num(extended)),
            line_color="rgba(0,0,0,.1)", showlegend=False,
            name="lo95 R<sub>t</sub>"
        )
    )
    fig.add_trace(
        go.Scatter(
            x=index, y=hifn(date2num(extended)),
            line_color="rgba(0,0,0,.1)", showlegend=False,
            fill="tonexty", fillcolor="rgba(0,0,0,.1)",
            name="hi95 R<sub>t</sub>"
        )
    )    
    fig.add_trace(
        go.Scatter(
            x=index, y=values,
            marker=dict(
                size=7,
                line=dict(width=1, color="black"),
                color=values,
                cmin=0,
                cmax=max(values),
                colorbar=dict(
                    title="R<sub>t</sub>"
                ),
                colorscale=[
                    [0., "rgba(0,150,0,1)"],
                    [1./max(values), "rgba(255, 255, 255, 1)"],
                    [1., "rgba(255,0,0,1)"],
                ]
            ),
            mode="markers+lines", showlegend=False,
            line = dict(color='grey', width=2, dash='dot'),
            name="R<sub>t</sub>",
        )
    )
    fig.update_layout(legend_orientation="h",
        showlegend=True, plot_bgcolor='rgba(0,0,0,0)', 
        yaxis={"gridcolor": '#bdbdbd', "zerolinecolor": '#969696', "range":[0, max(values)+1]},
        xaxis={"gridcolor": '#bdbdbd', "range":[hyperextended[0], hyperextended[-1]]},
        title={"text": "Real time {} R<sub>t</sub>".format(region), "xanchor": "center", "x": 0.5},
        yaxis_title="$R_t$", hovermode="x unified"
    )
    pyo.iplot(fig)
    
#    # daily variations
#    fig = go.Figure()
#    fig.add_trace(go.Scatter(
#        x=x[1:], y=yC[region],
#        mode='lines+markers', line_color="blue",
#        marker_size=3, marker_symbol="circle",
#        line_shape='spline',
#        name="cases"
#    ))
#    fig.add_trace(go.Scatter(
#        x=x[1:], y=yD[region],
#        mode='lines+markers', line_color="red",
#        marker_size=3, marker_symbol="diamond",
#        line_shape='spline',
#        name="deaths"
#    ))
#    fig.add_trace(go.Scatter(
#        x=x[1:], y=yR[region],
#        mode='lines+markers', line_color="green",
#        marker_size=3, marker_symbol="square",
#        line_shape='spline',
#        name="recovered"
#    ))
#    fig.add_trace(go.Scatter(
#        x=x[1:], y=yP[region],
#        mode='lines+markers', line_color="lightskyblue",
#        marker_size=3, marker_symbol="x",
#        line_shape='spline',
#        name="infected"
#    ))
#    # details
#    fig.add_trace(go.Scatter(
#        x=x[1:], y=yPric[region],
#        mode='lines+markers', line_color="magenta",
#        marker_size=3, marker_symbol="cross",
#        line_shape='spline',
#        name="hospitalized"
#    ))
#    fig.add_trace(go.Scatter(
#        x=x[1:], y=yPint[region],
#        mode='lines+markers', line_color="orange",
#        marker_size=3, marker_symbol="cross",
#        line_shape='spline',
#        name="emergency"
#
#    ))
#    fig.add_trace(go.Scatter(
#        x=x[1:], y=yPiso[region],
#        mode='lines+markers', line_color="black",
#        marker_size=3, marker_symbol="cross",
#        line_shape='spline',
#        name="home isolated"
#    ))
#    
#    fig.update_layout(legend_orientation="h",
#        showlegend=True, plot_bgcolor='rgba(0,0,0,0)', 
#        yaxis={"gridcolor": '#bdbdbd', "zerolinecolor": '#969696'},
#        xaxis={"gridcolor": '#bdbdbd'},
#        title={"text": "ITALY {} (new/day)".format(region), "xanchor": "center", "x": 0.5},
#        yaxis_title="number", hovermode="x"
#    )
#    pyo.iplot(fig)
    
    # google mobility
    
    df = Regional[Regional["sub_region_1"] == dpc_to_google[region]]
    fig = go.Figure()
    for column in df.columns[4:]:
        fig.add_trace(
            go.Scatter(
                x=df.index,
                y=df[column],
                name=column.replace("_", " ").title().split(" Percent")[0]
            )
        )
    fig.add_trace(
        go.Scatter(
            x=[pd.Timestamp("2020-03-11"), pd.Timestamp("2020-03-11")], y=[-100, 100],
            showlegend=False, name="lockdown", mode="lines",
            line=dict(color='black', width=2, dash='dot'),
        )
    )
    fig.add_trace(
        go.Scatter(
            x=[pd.Timestamp("2020-05-04"), pd.Timestamp("2020-05-04")], y=[-100, 100],
            showlegend=False, name="1st relax", mode="lines",
            line=dict(color='black', width=2, dash='dot'),
        )
    )
    fig.add_trace(
        go.Scatter(
            x=[
                pd.Timestamp("2020-05-04"), pd.Timestamp("2020-05-04"), 
                pd.Timestamp("2020-03-11"), pd.Timestamp("2020-03-11")],
            y=[-100, 100, 100, -100],
            fill="toself", fillcolor="rgba(125,125,125,.05)",
            showlegend=False, name="lockdown \U00000394",
            line_color="rgba(0,0,0,0)",
        )
    )
    fig.update_layout(
        legend_orientation="h",
        showlegend=True, plot_bgcolor='rgba(0,0,0,0)', 
        yaxis={"gridcolor": '#bdbdbd', "zerolinecolor": '#969696'},
        xaxis={"gridcolor": '#bdbdbd'},
        title={"text": f"{region} mobility (Google) up to {df.index[-1].date()}", "xanchor": "center", "x": 0.5},
        yaxis_title="Percentage Change from Baseline", hovermode="x unified"
    )
    pyo.iplot(fig)
    
#    L = max(TOTyC[region])
#    x0 = np.median(_x)
#    k = np.log(np.median(TOTyC[region])) / x0
#    popt, _ = curve_fit(logistic, _x, TOTyC[region], p0=[L, x0, k], method="dogbox")
#    C = logistic(_x, *popt)
#    
#    r = np.array(TOTyD[region]) + np.array(TOTyR[region])
#    L = max(r)
#    x0 = np.median(_x)
#    k = np.log(np.median(r)) / x0
#    try:
#        popt, _ = curve_fit(logistic, _x, r, p0=[L, x0, k], method="dogbox")
#        R = logistic(_x, *popt)
#    except:
#        popt, _ = curve_fit(logistic, _x, r, p0=[L * 2, x0, k], method="dogbox")
#        R = logistic(_x, *popt)
#    
#    I = np.array(C) - np.array(R)
#    
#    fig = go.Figure()
#    fig.add_trace(
#        go.Scatter(
#            x=x, y=TOTyP[region],
#            mode="markers", marker_symbol="square-open", marker_color="blue",
#            name="I"
#        )
#    )
#    fig.add_trace(
#        go.Scatter(
#            x=x, y=I, line_color="blue",
#            name="I fit"
#        )
#   )
#    
#    fig.add_trace(
#        go.Scatter(
#            x=x, y=r,
#            mode="markers", marker_symbol="circle-open", marker_color="red",
#            name="R"
#        )
#    )
#    fig.add_trace(
#        go.Scatter(
#            x=x, y=R,
#            name="R fit", line_color="red",
#        )
#    )
#    fig.update_layout(
#        title={"text": "{} (SIR model fitted data)".format(region), "xanchor": "center", "x": 0.5},
#    )
#    pyo.iplot(fig)
#    
#    display(Markdown('---'))
#    
#    R0 = []
#    for i in range(_x[-1]):
#        R0.append(
#            (I[i] - I[i-1]) / (R[i] - R[i-1]) + 1
#        )
#    
#    _R0.update({region: R0[-1]})
#    
#    fig = go.Figure()
#    fig.add_trace(
#        go.Scatter(
#            x=x[2:], y=R0[1:],
#            name="R<sub>0</sub>"
#        )
#    )
#    fig.add_trace(
#        go.Scatter(
#            x=[x[2], x[-1]],
#            y=[1, 1],
#            fill="tozeroy",
#            line_color="rgba(0, 0, 0, 0)",
#            fillcolor="rgba(0, 250, 0, .25)",
#            name="R<sub>0</sub> < 1"
#        )
#    )
#    fig.update_layout(
#        showlegend=True, plot_bgcolor='rgba(0,0,0,0)', 
#        yaxis={"gridcolor": '#bdbdbd', "zerolinecolor": '#969696'},
#        xaxis={"gridcolor": '#bdbdbd'},
#        title={"text": "{} (R<sub>0</sub>)".format(region), "xanchor": "center", "x": 0.5},
#        yaxis_title="$R_0$", hovermode="x unified"
#    )
#    pyo.iplot(fig)
    
#with open("R0_regioni.json", "w") as f:
#    json.dump(_R0, f)

Abruzzo


Basilicata


P.A. Bolzano


Calabria


Campania


Emilia-Romagna


Friuli Venezia Giulia


Lazio


Liguria


Lombardia


Marche


Molise


Piemonte


Puglia


Sardegna


Sicilia


Toscana


P.A. Trento


Umbria


Valle d'Aosta


Veneto

In [ ]:
 

© 2020 Max Pierini & Sandra Mazzoli & Alessio Pamovio

Exported from italy/regions-single.ipynb committed by Max Pierini on Mon Aug 31 10:59:12 2020 revision 1, 8eb7ef8